*******************************************************************************
* "Corruption and Political Support: The Case of Peru's Vacuna-gate Scandal"
* By Oscar Castorena, Noam Lupu, Adam Wolsky, and Elizabeth J. Zechmeister
* This do file replicates the results the main paper and appendix
*******************************************************************************

clear
use "Peru2018AB.dta", clear
set more off

* install coefplot package
ssc install coefplot, replace

* Distribution of time
hist fecha, discrete freq scheme(plotplain)

* defining treatment as interviews conducted after Feb. 11, 2021
gen treatment = 1 if fecha > date("11/02/2021", "DMY")
replace treatment = 0 if fecha <= date("11/02/2021", "DMY")

* days to/since scandal
gen scandal_time_diff = fecha - date("11/02/2021", "DMY")
hist scandal_time_diff, discrete freq scheme(plotplain) xtitle("Days to/since scandal (0=Feb. 11th)")

* Lima province indicator
gen lima = 1 if prov1t == 1100014
replace lima = 0 if prov1t != 1100014 & !inlist(prov1t, .a, .b)
lab var lima "Lima province"
lab define lima 0 "Non-Lima" 1 "Lima"
lab values lima lima
tab lima

* Labeling urban/rural and education variables
lab define ur 1 "City" 2 "City outskirts" 3 "Town in rural area" 4 "Rural area"
lab values ur1new ur

* recode edr
recode edr (0=1), gen(edr_r)
lab define ed 1 "Primary" 2 "Secondary" 3 "Tertiary or higher"
lab values edr_r ed

* gender - female
recode q1tb (1 3 = 0 "Not Female")(2 = 1 "Female"), gen(mujer)
lab var mujer "Female"

* creating education, age and urban/rural dummies
recode edr (0 1=1)(2 3=0), gen(edr_1)
lab var edr_1 "None or Primary Ed."
recode edr (2=1)(0 1 3=0), gen(edr_2)
lab var edr_2 "Secondary Ed."
recode edr (3=1)(0 1 2=0), gen(edr_3)
lab var edr_3 "Tertiary/Higher Ed."
gen age = edad
lab val age edad_eng
tabulate age, gen(edad_)
tab ur1new, gen(ur1new_)
lab var ur1new_1 "Live in a city"
lab var ur1new_2 "Live on outskirts of city"
lab var ur1new_3 "Live in a town in rural area"
lab var ur1new_4 "Live in a rural area"

* rescaling perceptions of corruption and support for democracy
gen exc7new_recode = (exc7new - 1) / 4 * 100
gen ing4_recode = (ing4 - 1) / 6 * 100

* creating system support variable using principal component factor analysis
factor b2 b3 b4 b6, pcf
rotate
predict sys_sup
qui summ sys_sup
gen sys_sup_recode = 100 / (r(max) - r(min)) * (sys_sup - r(min))

* recode other democracy variables
recode jc13 (2=0), gen(jc13_recode)
recode jc15a (2=0), gen(jc15_recode)

lab var sys_sup_recode "System Support"
lab var exc7new_recode "Perceptions of Corruption"
lab var ing4_recode "Support for Democracy"
lab var jc13_recode "Coup Justified"

*** Matching

* Calculating imbalance prior to CEM weights

imb mujer edad edr lima ur1new_1 ur1new_2 ur1new_3 ur1new_4, treatment(treatment)

* Creating CEM weights / getting L1 distance statistic

cem mujer edad (1 2 3 4 5 6) edr (1 2 3) lima ur1new_1 ur1new_2 ur1new_3 ur1new_4, treatment(treatment)

* FIGURE 1 Effect of Corruption Scandal on Public Opinion
* testing for negligible effects
* system support
eststo clear
eststo a: reg sys_sup_recode i.treatment [iweight = cem_weights]
eststo b: reg sys_sup_recode i.treatment 
eststo c: reg sys_sup_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: reg sys_sup_recode i.treatment 
eststo e: svy: reg sys_sup_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima

coefplot a b c d e, msymbol(circle) mcolor(black) ciopts(color(black)) levels(95 90) keep(1.treatment) ///
coeflabels(1.treatment = `""CEM weights" " " " " " " " " " " "No weights" " " " " " " " " " " "No weights, with controls" " " " " " " " " " " "Survey weights" " " " " " " " " " " " " "Survey weights, with controls""') ///
ylabel(, notick labgap(0)) xline(0, lpattern(solid)) legend(off)  scheme(plotplain) ///
xtitle("Effect of corruption scandal") xlabel(-7 -5 -3 -1 1 3 5 7) xline(-3.35 3.35) ///
title("System support") saving(meaningful_effect_syssup.gph, replace)  plotregion(lcolor(black))

* support for democracy
eststo clear
eststo a: reg ing4_recode i.treatment [iweight = cem_weights]
eststo b: reg ing4_recode i.treatment 
eststo c: reg ing4_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: reg ing4_recode i.treatment 
eststo e: svy: reg ing4_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima

coefplot a b c d e, msymbol(circle) mcolor(black) ciopts(color(black)) levels(95 90) keep(1.treatment) ///
coeflabels(1.treatment = `""CEM weights" " " " " " " " " " " "No weights" " " " " " " " " " " " No weights, with controls" " " " " " " " " " " "Survey weights" " " " " " " " " " " " " "Survey weights, with controls""') ///
ylabel(, notick labgap(0)) xline(0, lpattern(solid)) legend(off)  scheme(plotplain) ///
xtitle("Effect of corruption scandal")  xlabel(-7 -5 -3 -1 1 3 5 7) xline(-4.49 4.49) ///
title("Support for democracy") saving(meaningful_effect_ing4.gph, replace)  plotregion(lcolor(black))

* perception of corruption
eststo clear
eststo a: reg exc7new_recode i.treatment [iweight = cem_weights]
eststo b: reg exc7new_recode i.treatment 
eststo c: reg exc7new_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: reg exc7new_recode i.treatment 
eststo e: svy: reg exc7new_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima

coefplot a b c d e, msymbol(circle) mcolor(black) ciopts(lcolor(black)) levels(95 90) keep(1.treatment) ///
coeflabels(1.treatment = `""CEM weights" " " " " " " " " " " "No weights" " " " " " " " " " " " No weights, with controls" " " " " " " " " " " "Survey weights" " " " " " " " " " " "Survey weights, with controls""') ///
ylabel(, notick labgap(0)) xline(0, lpattern(solid)) legend(off)  scheme(plotplain) ///
xtitle("Effect of corruption scandal")  xlabel(-7 -5 -3 -1 1 3 5 7) xline(-2.74 2.74) ///
title("Corruption perceptions") saving(meaningful_effect_exc7new.gph, replace) plotregion(lcolor(black))

graph combine meaningful_effect_exc7new.gph meaningful_effect_syssup.gph meaningful_effect_ing4.gph, row(1) scheme(plotplain) xsize(8)

* Appendix

* Table A1
* Regional distribution of interviews

* pre and post filter period
gen filter = 1 if fecha >= date("9/03/2021", "DMY")
replace filter = 0 if fecha < date("9/03/2021", "DMY")

tab prov1t filter, col

* Table A2 
* Balance Tests
eststo clear

local vars mujer edad_1 edad_2 edad_3 edad_4 edad_5 edad_6 edr_1 edr_2 edr_3 ur1new_1 ur1new_2 ur1new_3 ur1new_4 lima
estpost prtest `vars', by(treatment)
esttab using balanceTest.rtf, cells("P_1(fmt(%12.3f)) P_2(fmt(%12.3f)) b(fmt(%12.3f)) p(fmt(%12.3f))") replace nonumber label varwidth(25)

* Table A3
* Treatment and Control Groups After Coarsened Exact Matching
eststo clear

prop mujer [iweight  = cem_weights], over(treatment)

foreach x in mujer edad_1 edad_2 edad_3 edad_4 edad_5 edad_6 ///
 edr_1 edr_2 edr_3 lima ur1new_1 ur1new_2 ur1new_3 ur1new_4 {
prop `x' [iweight  = cem_weights], over(treatment)
matrix `x' = r(table)
matrix `x' = `x'[1...,3], `x'[1...,4]
matrix colnames `x' = ":Control" ":Treatment"
matrix rownames `x' = `x' se
matrix `x' = `x'[1..2,1...]
}

/* Extracting each proprtion value and standard error from matrix to format table nicely */

matrix control = mujer[1,1],edad_1[1,1],edad_2[1,1],edad_3[1,1],edad_4[1,1], ///
edad_5[1,1],edad_6[1,1],edr_1[1,1],edr_2[1,1],edr_3[1,1],lima[1,1],ur1new_1[1,1], ///
ur1new_2[1,1],ur1new_3[1,1],ur1new_4[1,1]

matrix secontrol = mujer[2,1],edad_1[2,1],edad_2[2,1],edad_3[2,1],edad_4[2,1], ///
edad_5[2,1],edad_6[2,1],edr_1[2,1],edr_2[2,1],edr_3[2,1],lima[2,1],ur1new_1[2,1], ///
ur1new_2[2,1],ur1new_3[2,1],ur1new_4[2,1]

matrix treatment = mujer[1,2],edad_1[1,2],edad_2[1,2],edad_3[1,2],edad_4[1,2], ///
edad_5[1,2],edad_6[1,2],edr_1[1,2],edr_2[1,2],edr_3[1,2],lima[1,2],ur1new_1[1,2], ///
ur1new_2[1,2],ur1new_3[1,2],ur1new_4[1,2]

matrix setreatment = mujer[2,2],edad_1[2,2],edad_2[2,2],edad_3[2,2],edad_4[2,2], ///
edad_5[2,2],edad_6[2,2],edr_1[2,2],edr_2[2,2],edr_3[2,2],lima[2,2],ur1new_1[2,2], ///
ur1new_2[2,2],ur1new_3[2,2],ur1new_4[2,2]

/* Adding the values of the proportions and standard errors together to make table */
estadd matrix control
estadd matrix secontrol
estadd matrix treatment
estadd matrix setreatment

/* Table contains the proportions of variables and puts standard error values below in 
parentheses in table */
esttab using balance_cem.rtf, ///
cells("control(fmt(3)) treatment" "secontrol(par) setreatment(par)") ///
collabels("Control" "Treatment") ///
coeflabels (c1 "muer" c2 "18-25" c3 "26-35" c4 "36-45" c5 "46-55" c6 "56-65" c7 "65+" ///
c8 "Primary or less" c9 "Secondary" c10 "Tertiary" c11 "Lima Province" c12 "City" ///
c13 "Outskirts of City" c14 "Town" c15 "Rural Area") noobs ///
addnotes(Note: Standard errors in parentheses.) nomtitles nonumbers replace


* Table A4
* Item Nonresponse with CEM Weights
* Missing data for DVs

gen exc7new_miss = (missing(exc7new_recode))
gen ing4_miss = (missing(ing4_recode))
gen sys_sup_miss = (missing(sys_sup_recode))

tab exc7new_miss if core_a_core_b == "Core B"
tab sys_sup_miss if core_a_core_b == "Core A"
tab ing4_miss

eststo clear

prop exc7new_miss if core_a_core_b == "Core B" [iweight  = cem_weights], over(treatment)
eststo miss_a: lincomest _b[1.exc7new_miss@0.treatment]-_b[1.exc7new_miss@1.treatment]

prop sys_sup_miss [iweight  = cem_weights] if core_a_core_b == "Core A", over(treatment)
eststo miss_b: lincomest _b[1.sys_sup_miss@0.treatment]-_b[1.sys_sup_miss@1.treatment]

prop ing4_miss [iweight  = cem_weights], over(treatment)
eststo miss_c: lincomest _b[1.ing4_miss@0.treatment]-_b[1.ing4_miss@1.treatment]

esttab using missproptest.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted 

* Figure A3
* Density plots pre- and post-scandal
twoway (hist ing4 if treatment == 0, discrete color(green)) ///
	   (hist ing4 if treatment == 1, discrete fcolor(none) lcolor(black)), ///
	   legend(order(1 "Pre-Scandal" 2 "Post-Scandal")) scheme(plotplain) xlabel(1(1)7) saving(ing4.gph, replace)
twoway (hist exc7new if treatment == 0, discrete color(green)) ///
	   (hist exc7new if treatment == 1, discrete fcolor(none) lcolor(black)), ///
	   legend(order(1 "Pre-Scandal" 2 "Post-Scandal")) scheme(plotplain) ///
	   xlabel(1 "None" 2 `""Less than" "half of them""' 3 `""Half of" "politicians""' 4 `""More than" "half of them""' 5 "All") saving(exc7new.gph, replace)
twoway (kdensity sys_sup_recode if treatment == 0, bwidth(5) color(green)) ///
	   (kdensity sys_sup_recode if treatment == 1, bwidth(5) fcolor(none) lcolor(black)), ///
	   legend(order(1 "Pre-Scandal" 2 "Post-Scandal")) scheme(plotplain) ytitle("Density") ///
	   xtitle("System Support") saving(sys_sup.gph, replace)
graph combine ing4.gph exc7new.gph sys_sup.gph, scheme(plotplain) col(1) ysize(6) xsize(4)
graph export density_plots.png, as(png) width(1000) replace

* Table A5
* Effect of Scandal on Corruption Perceptions
eststo clear
eststo a: reg exc7new_recode i.treatment [iweight = cem_weights]
eststo b: reg exc7new_recode i.treatment 
eststo c: reg exc7new_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: reg exc7new_recode i.treatment 
eststo e: svy: reg exc7new_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
esttab using exc7new.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted

* Table A6
* Effect of Scandal on System Support
eststo clear
eststo a: reg sys_sup_recode i.treatment [iweight = cem_weights]
eststo b: reg sys_sup_recode i.treatment 
eststo c: reg sys_sup_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: reg sys_sup_recode i.treatment 
eststo e: svy: reg sys_sup_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
esttab using syssup.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted

* Table A7
* Effect of Scandal on Support for Democracy
eststo clear
eststo a: reg ing4_recode i.treatment [iweight = cem_weights]
eststo b: reg ing4_recode i.treatment 
eststo c: reg ing4_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: reg ing4_recode i.treatment 
eststo e: svy: reg ing4_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
esttab using ing4.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted

* Table A8
* Effect of Scandal on Individual System Support Items
* Model elements of system support separately
gen b2_resc = (b2 - 1) / 6 * 100
gen b3_resc = (b3 - 1) / 6 * 100
gen b4_resc = (b4 - 1) / 6 * 100
gen b6_resc = (b6 - 1) / 6 * 100
eststo clear
eststo a_a: reg b2_resc i.treatment [iweight = cem_weights]
eststo b_a: reg b3_resc i.treatment [iweight = cem_weights]
eststo c_a: reg b4_resc i.treatment [iweight = cem_weights]
eststo d_a: reg b6_resc i.treatment [iweight = cem_weights]
esttab using b_series.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted b(%12.3f) se(%12.3f)

* Table A9
* Effect of Scandal on Tolerance for Military Coup
eststo clear
eststo a: logit jc13_recode i.treatment [iweight = cem_weights]
eststo b: logit jc13_recode i.treatment 
eststo c: logit jc13_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: logit jc13_recode i.treatment 
eststo e: svy: logit jc13_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
esttab using jc13_allmodels.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted b(%12.3f) se(%12.3f)

* Table A10
* Effect of Scandal on Tolerance for Executive Coup
eststo clear
eststo a: logit jc15_recode i.treatment [iweight = cem_weights]
eststo b: logit jc15_recode i.treatment 
eststo c: logit jc15_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
eststo d: svy: logit jc15_recode i.treatment 
eststo e: svy: logit jc15_recode i.treatment i.mujer i.edad i.edr_r i.ur1new i.lima
esttab using jc15_allmodels.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted b(%12.3f) se(%12.3f)

* Table A11
* Heterogeneous treatment effects of scandal on corruption perceptions
recode conocim (1=5)(2=4)(3=3)(4=2)(5=1), gen(conocim_resc)

eststo clear
eststo a: reg exc7new_recode i.treatment##c.mujer [iweight = cem_weights]
eststo b: reg exc7new_recode i.treatment##c.edad [iweight = cem_weights]
eststo c: reg exc7new_recode i.treatment##c.edr_r [iweight = cem_weights]
eststo d: reg exc7new_recode i.treatment##c.conocim_resc [iweight = cem_weights]
esttab using het_exc7new.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted b(%12.3f) se(%12.3f)

* Table A12
* Heterogeneous treatment effects of scandal on system support
eststo clear
eststo a: reg sys_sup_recode i.treatment##c.mujer [iweight = cem_weights]
eststo b: reg sys_sup_recode i.treatment##c.edad [iweight = cem_weights]
eststo c: reg sys_sup_recode i.treatment##c.edr_r [iweight = cem_weights]
eststo d: reg sys_sup_recode i.treatment##c.conocim_resc [iweight = cem_weights]
esttab using het_sys_sup.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted b(%12.3f) se(%12.3f)

* Table A13
* Heterogeneous treatment effects of scandal on support for democracy
eststo clear
eststo a: reg ing4_recode i.treatment##c.mujer [iweight = cem_weights]
eststo b: reg ing4_recode i.treatment##c.edad [iweight = cem_weights]
eststo c: reg ing4_recode i.treatment##c.edr_r [iweight = cem_weights]
eststo d: reg ing4_recode i.treatment##c.conocim_resc [iweight = cem_weights]
esttab using het_ing4.rtf, replace se star(+ 0.10 * 0.05 ** 0.01) label nobaselevels nogaps nomtitles noomitted b(%12.3f) se(%12.3f)

* Figure A1
import delimited "peruTimeline.csv", bindquote(strict) varnames(1) clear

gen date = date(day, "MDY")
format date %td

tsset  date, daily

graph twoway tsline vizcarra, scheme(plotplain) ytitle("Relative Google searches") ///
legend(row(1) pos(6)) tline(11feb2021)

